library(ape)
library(phangorn)
library(caper)
library(tidyverse)
library(ggtree)
library(picante)
library(brms)
library(phangorn)
library(phytools)
library(treeio)
library(MASS)
library(car)
library(corrplot)
library(emmeans)
library(broom)
library(ggdist)
library(tidybayes)
library(raster)
library(sf)
library(exactextractr)
library(performance)##Check ultrametric and/or fix
check_and_fix_ultrametric <- function(phy){
if (!is.ultrametric(phy)){
vv <- vcv.phylo(phy)
dx <- diag(vv)
mxx <- max(dx) - dx
for (i in 1:length(mxx)){
phy$edge.length[phy$edge[,2] == i] <- phy$edge.length[phy$edge[,2] == i] + mxx[i]
}
if (!is.ultrametric(phy)){
stop("Ultrametric fix failed\n")
}
}
return(phy)
}
##Removed duplicated tips
remove_duplicate_tips<-function(tree){
for(spe in unique(tree$tip.label)){
pos<-grep(paste("\\b",spe,"\\b",sep=""),tree$tip.label)
if(length(pos)>1){
rem<-pos[2:length(pos)]
tree<-ape::drop.tip(phy=tree,tip=rem)
}
}
return(tree)
}
## Function for renaming tips
rename.tips.phylo <- function(tree, names) {
tree$tip.label <- names
return(tree)
}
#Stardarize variables
standard_varibles<-function(frame_data){
for(nom in names(frame_data)){
frame_data<-data.frame(frame_data)
if(class(frame_data[,nom])!="numeric"){next}
frame_data[,nom]<-scale(frame_data[,nom])[,1]
}
return(frame_data)
}
#Standard error function
se <- function(x) sd(x)/sqrt(length(x))
#Mean function
meanfun <- function(data, i){
d <- data[i, ]
return(mean(d))
}
#Variation coefficient
var_coef <- function(x, na.rm = FALSE) {
sd(x, na.rm=na.rm) / mean(x, na.rm=na.rm)
}We are going to load the phylogenies of the two main families (Araneidae and Theridiidae) obtained in BEAST to removed duplicated tips and prune some outgroups to generate a single phylogeny for further analyses.
#Load Araneidae tree
mcc_tree<-read.nexus("araneidae_new_final.tre")
#load fixed tip names
nam_tree<-read_csv("tip_names_araneidae.csv",col_names=T)
mcc_tree$tip.label<-nam_tree$corrected_name
###Remove repeated tips
tree_removedTips<-remove_duplicate_tips(mcc_tree)
##Remove tips to mix with Theridiidae tree
core<-extract.clade(phy=tree_removedTips,node=c(194), collapse.singles = TRUE,interactive = FALSE)
outgroups<- tree_removedTips$tip.label[which(tree_removedTips$tip.label %in% core$tip.label==FALSE)]
outgroups_araneidae<-outgroups
tree_removedTips<-drop.tip(tree_removedTips,outgroups)#Load Theridiidae tree
tree_theridiidae<-read.nexus("total_Theridiidae_tree.tre")
#load fixed tip names
nam_tree<-read_delim("theridiidae_tips.txt",col_names=F)
tree_theridiidae$tip.label<-nam_tree$X2
#Remove repeated tips
tree_removed_theridiidae<-remove_duplicate_tips(tree_theridiidae)
#remove problematic tips
problematic_tips<-c("Chrysso_albipes","Chrysso_sp","Erigone_dentosa")
tree_removed_theridiidae<-drop.tip(tree_removed_theridiidae,c(problematic_tips))
#Identify the outgroup
plotTree(tree_removed_theridiidae) #nodelabels()
outgroups<-extract.clade(phy=tree_removed_theridiidae, node=304, root.edge = 0, collapse.singles = TRUE,interactive = FALSE) #Keep an eye on the node
outgroups_theridiidae<-outgroupsNow with the two phylogenies, we are going to join them fro further analyses
calib<-makeChronosCalib(tree_removed_theridiidae, age.min = max(node.depth.edgelength(tree_removedTips)), age.max = max(node.depth.edgelength(tree_removedTips)))
tmp_t<-chronos(tree_removed_theridiidae, lambda = 1, model = "correlated", quiet = FALSE,
calibration = calib,
control = chronos.control())##
## Setting initial dates...
## Fitting in progress... get a first set of estimates
## (Penalised) log-lik = -628.3153
## Optimising rates... dates... -628.3153
##
## log-Lik = -628.3153
## PHIIC = 2190.63
joint_trees_outgroups<-bind.tree(tree_removedTips,tmp_t, interactive = FALSE)
joint_trees<-bind.tree(tree_removedTips, ape::drop.tip(phy=tmp_t,outgroups$tip.label), interactive = FALSE)
# get scaled edge.length
joint_trees$edge.length <- joint_trees$edge.length / (max(joint_trees$edge.length))
#Remove duplicate tips
joint_trees<-remove_duplicate_tips(joint_trees)
#Check that the final tree is ultrametric
joint_trees<-check_and_fix_ultrametric(joint_trees)#load the csv file
join_dataset<-read_csv("data_total.csv",col_names=T)
#replace spaces in species names
join_dataset$species<-gsub(pattern=" ", replacement="_",join_dataset$species)
#keep unique rows
join_dataset<-distinct(join_dataset,species,.keep_all = TRUE)
#Tranform presence in islands to a binary variable
join_dataset<-join_dataset %>% mutate(bin_island=ifelse(cat_island=="island"|cat_island=="island_continent",1,0))
#replace spaces in species names on the tree
join_tree<-multi2di(joint_trees)
join_tree$tip.label<-gsub(pattern=" ", replacement="_",join_tree$tip.label)
##Check if the table match the tree tips
#remove species that are not in the phylogeny
join_dataset<-join_dataset[join_dataset$species %in% join_tree$tip.label,]
##Add species with no information into the phylogeny, like XX_sp
for(spe in unique(join_tree$tip.label)){
#Remove
if(spe %in% join_dataset$species==FALSE){
print(spe)
join_dataset<-join_dataset %>% add_row(species=spe)
}
}
#Let's modify the dataset to deal with colour polytipic species
join_dataset$polymorphism[which(join_dataset$polymorphism %in% c("polytipic","possible polytipic","pattern variable")==TRUE)]<-NA
join_dataset<-join_dataset %>% filter(!is.na(polymorphism))
dataset_all_species_phylogeny<-join_datasetNow we have a single and a dataset that match each other.
Let’s see how is the presence of colour polymorphism present in the phylogeny
#Change tree name
to_plot_tree<-join_tree
#Find colour polymorphic lineages
otus<-join_dataset %>% filter(polymorphism=="yes") %>% pull(species)
to_plot_tree<-groupOTU(to_plot_tree, otus)
df_polymorphism<-data.frame(join_dataset$polymorphism)
#df_island<-data.frame(as.character(join_dataset$bin_island))
rownames(df_polymorphism)<-join_dataset$species
#plot tree with names
p<-ggtree(to_plot_tree, layout='circular') + geom_tiplab()
#pdf("total_tree_names.pdf", width=20,height=20)
plot(p)#dev.off()
#plot tree colour polymorphism
p<-ggtree(to_plot_tree, layout='circular')
#pdf("total_tree_polymorphism.pdf", width=20,height=20)
gheatmap(p, df_polymorphism, offset=.001, width=.08,colnames = FALSE, colnames_offset_y = 1)+scale_fill_manual(values=c("#1ABEC6","#FF5B00"),name="Presence of\ncolour polymorphism")Arachnids is one of the groups with the poorest geographic information available in public databases.For instance, in our data ~51% of the species has less than 50 geographical records
species_points<-join_dataset %>% drop_na(n_points)
species_geo<-nrow(species_points[species_points$n_points<50,])/nrow(species_points)*100
print(paste0(species_geo,"%"," of the species with geographical information has less than 50 geographical records"))## [1] "51.5625% of the species with geographical information has less than 50 geographical records"
To account for this, we decided to calculated the mean and its 95% confidence interval (CI) for the number of geographical records available for all the species. We excluded species from the subsequent analyses that fell outside the lower CI.
#Due to high vari
l_points<-na.omit(log(join_dataset$n_points))
##Let's calculate the 95% around the mean
library(boot)
data <- data.frame(xs = l_points)
bo <- boot(data[, "xs", drop = FALSE], statistic=meanfun, R=5000)
mean_ci<-boot.ci(bo, conf=0.95, type="bca")
ggplot(tibble(x=bo$t[,1]), aes(x=x)) +geom_density()+geom_segment(x=mean_ci$bca[4],xend=mean_ci$bca[5],y=0,yend=0,color="blue",size=2,lineend="round")all the predictors seems skewed or not uniform distributed, let’s modify some predictors that may affect the regression due to their non-normal distribution
datos_filtered$T_centroid_lat<-abs(datos_filtered$centroid_lat)
datos_filtered$T_lat_range<-sqrt(abs(datos_filtered$lat_range))
datos_filtered$T_area_polygon<-sqrt(datos_filtered$area_polygon+1)
datos_filtered$T_lat_range_wsc<-abs(datos_filtered$lat_range_wsc)
datos_filtered$T_area_countries_wsc<-sqrt(datos_filtered$area_countries_wsc)
datos_filtered$T_points<-log(datos_filtered$n_points)
datos_filtered$T_bole<-log(datos_filtered$bole_female)Remove species from islands that can affect calculations due to their geographic limit for dispersion
Number of colour monomorphic and polymorphic species
##
## no yes
## 59 33
Correlation plot between the variables
cor_matrix <- cor(na.omit(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))
colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )Standardize variable before the analysis, excluding the count variables
#datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()Now, let’s prepare the dataset and tree so they match, this is super important. Your phylogeny names need to match a column of data
Let’s run the models!
Evaluate if the colour monomorphic and colour polymorphic species differ in the number of records
set.seed(30011994)
brm_points <- brm(
n_points ~ polymorphism,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
#data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="points1.rda"
)
pairs(brm_points)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: n_points ~ polymorphism
## Data: datos_filtered_total (Number of observations: 92)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 7.14 0.17 6.82 7.49 1.00 25569 21635
## polymorphismyes 0.55 0.28 0.01 1.12 1.00 26066 22390
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 0.60 0.07 0.47 0.76 1.00 27048 23466
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.040 (95% CI [8.475e-10, 0.160])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
They do not have differences in the number of records
Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid
set.seed(30011994)
brm_centroid <- brm(
T_centroid_lat ~ polymorphism,
data = datos_filtered_total,
family = skew_normal(),
#prior = prior,
# data2 = list(A = A),
control = list(adapt_delta = 0.99999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="centroid1.rda"
)
pairs(brm_centroid)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_centroid_lat ~ polymorphism
## Data: datos_filtered_total (Number of observations: 91)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 36.74 2.42 31.91 41.44 1.00 25713 22173
## polymorphismyes -4.90 3.86 -12.52 2.66 1.00 26546 24053
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 18.80 1.53 16.10 22.12 1.00 22032 20805
## alpha -1.73 1.78 -5.22 1.39 1.00 18590 22096
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.017 (95% CI [2.460e-11, 0.081])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
They do not have differences in the latitude of the centroid
Evaluate if the colour monomorphic and colour polymorphic species differ in the body length
set.seed(30011994)
brm_bole <- brm(
T_bole ~ polymorphism,
data = datos_filtered_total,
family = gaussian(),
#prior = prior,
#data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="bole1.rda"
)
pairs(brm_bole)## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_bole ~ polymorphism
## Data: datos_filtered_total (Number of observations: 91)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.12 0.08 1.97 2.28 1.00 29096 22723
## polymorphismyes 0.16 0.14 -0.11 0.43 1.00 27882 22512
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.62 0.05 0.54 0.72 1.00 28708 23708
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.015 (95% CI [2.925e-12, 0.082])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
They do not have differences in body length
let’s Evaluate the association fo the predictors
lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=datos_filtered_total)
check_collinearity(lm_lat_range)The predictors are not collinear, we can use all of them in the models
set.seed(30011994)
brm_latrange_1 <- brm(
T_lat_range ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = gaussian(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.9999, max_treedepth=11)
,chains = 4, cores = 4, iter = 20000
,file="latrange1.rda"
)
pairs(brm_latrange_1)## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 90)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 90)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.88 0.38 0.12 1.61 1.00 2319 3305
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 5.78 0.84 4.12 7.43 1.00 28876 28641
## polymorphismyes 0.67 0.31 0.06 1.27 1.00 19696 25067
## T_bole 0.28 0.29 -0.29 0.85 1.00 30353 29554
## T_centroid_lat -0.03 0.01 -0.05 -0.02 1.00 25875 29203
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.02 0.19 0.60 1.36 1.00 2450 2632
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.475 (95% CI [0.177, 0.802])
## Marginal R2: 0.299 (95% CI [0.145, 0.427])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
area_polygon_1 <- brm(
T_area_polygon ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)),
data = datos_filtered_total,
family = skew_normal(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="areapolygon1.rda"
)
pairs(area_polygon_1)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 89)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 89)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 334.81 208.47 17.94 783.59 1.00 8654 16747
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1437.14 669.67 125.38 2773.04 1.00 45615 30231
## polymorphismyes 371.37 248.65 -114.24 866.03 1.00 47901 29163
## T_bole 296.21 230.37 -158.65 748.90 1.00 53549 32232
## T_centroid_lat 8.04 8.09 -8.05 23.77 1.00 46338 31192
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1140.77 105.83 944.89 1359.32 1.00 24904 24081
## alpha 3.57 1.60 1.38 7.67 1.00 21105 16581
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.121 (95% CI [0.010, 0.276])
## Marginal R2: 0.068 (95% CI [0.002, 0.162])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
To indirectly explore the difference in niche width between colour monomorphic and polymorphic species, we measured the number of ecological regions occupy by each species using the geographical records and polygons. the ecological regions were obtained here
BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on geographical records
set.seed(30011994)
eco_reg_points_1 <- brm(
eco_reg_points ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="ecoregpoints1.rda"
)
pairs(eco_reg_points_1)## Family: poisson
## Links: mu = log
## Formula: eco_reg_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 90)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 90)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.26 0.12 1.05 1.51 1.00 7774 14771
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.59 0.71 0.18 2.98 1.00 6148 11039
## polymorphismyes 0.42 0.21 0.01 0.84 1.00 6047 11417
## T_bole 0.54 0.23 0.10 0.99 1.00 6184 11667
## T_centroid_lat 0.01 0.01 -0.00 0.02 1.00 6466 11658
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.964 (95% CI [0.948, 0.977])
## Marginal R2: 0.151 (95% CI [0.002, 0.434])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on polygon
eco_reg_polygon_1 <- brm(
eco_reg_polygon~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="ecoregpol1.rda"
)
pairs(eco_reg_polygon_1)## Family: poisson
## Links: mu = log
## Formula: eco_reg_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 89)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 89)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.33 0.12 1.13 1.58 1.00 5164 9918
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.89 0.74 1.46 4.35 1.00 4746 8849
## polymorphismyes 0.44 0.22 0.01 0.86 1.00 4689 8391
## T_bole 0.57 0.23 0.11 1.03 1.00 4978 9099
## T_centroid_lat -0.01 0.01 -0.02 0.00 1.00 4667 8828
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.985 (95% CI [0.977, 0.991])
## Marginal R2: 0.223 (95% CI [0.017, 0.508])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here
temp_zones_points_1 <- brm(
temp_zones_points~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="climpoint1.rda"
)
pairs(temp_zones_points_1)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 90)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 90)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.12 0.08 0.01 0.30 1.00 8401 15556
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.16 0.24 0.67 1.63 1.00 36082 26672
## polymorphismyes 0.18 0.09 -0.01 0.36 1.00 42445 30413
## T_bole 0.32 0.08 0.15 0.48 1.00 40149 28908
## T_centroid_lat 0.01 0.00 0.00 0.01 1.00 47812 30646
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 52.83 50.14 11.90 195.60 1.00 29584 27642
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.277 (95% CI [0.113, 0.463])
## Marginal R2: 0.233 (95% CI [0.081, 0.392])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
temp_zones_polygon_1 <- brm(
temp_zones_polygon~polymorphism+T_bole+T_centroid_lat+ (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="climpol1.rda"
)
pairs(temp_zones_polygon_1)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 89)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 89)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.09 0.07 0.00 0.26 1.00 11792 18059
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.82 0.25 2.33 3.32 1.00 41709 30165
## polymorphismyes -0.02 0.10 -0.22 0.17 1.00 49572 31171
## T_bole 0.04 0.09 -0.13 0.22 1.00 40894 31970
## T_centroid_lat -0.01 0.00 -0.02 -0.00 1.00 49852 31213
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 10.12 3.72 5.45 18.96 1.00 33367 22063
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.251 (95% CI [0.100, 0.406])
## Marginal R2: 0.208 (95% CI [0.066, 0.356])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
all_models<-NULL
for(mod in c("brm_latrange_1","area_polygon_1","eco_reg_points_1","eco_reg_polygon_1","temp_zones_polygon_1","temp_zones_points_1")){
baye_mode = get(mod)
bayes_results<-baye_mode %>%
spread_draws(b_polymorphismyes)
bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
all_models<-bind_rows(all_models, bayes_results)
}
all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
stat_halfeye()+
theme_classic()+
geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
labs(x="Estimate",y="Models")Plot of the model
paleta1<-c("#1ABEC6","#FF5B00")
dataset_plot<-datos_filtered_total %>% drop_na(T_lat_range|polymorphism|T_bole|T_centroid_lat)
dataset_plot$predict<-predict(brm_latrange_1,type="response")[,"Estimate"]
dataset_plot %>% ggplot(aes(x=polymorphism,y=predict,fill=polymorphism))+geom_point(aes(x=polymorphism,y=T_lat_range),shape = 21,size=3, position = position_jitterdodge(),alpha=0.5)+geom_violin(aes(x=polymorphism,y=T_lat_range),alpha=0.1, position = position_dodge(width = .75),size=1)+
stat_summary(fun = mean,aes(color = polymorphism,group=polymorphism),fun.min = function(x) mean(x) - (2*se(x)),fun.max = function(x) mean(x)+(2*se(x)),geom = "pointrange",shape=22,size=1.5,col="black")+scale_fill_manual(values=paleta1)+scale_colour_manual(values=paleta1)+theme_classic()+labs(x="Colour polymorphism",y="Latitudinal range")To eliminate any false association caused by sampling bias, we repeated the above analyses with a reduced dataset. The subset was created by calculating a linear regression between the number of geographical records and the geographical area of the regions described in the WSC (a positive relationship), and then discarding species outside the lower boundary of the 50% predictive confidence interval (Quantile 0.75 and Quantile 0.25). In this way we only kept species with a small number of records when their WSC calculated range was calculated as very small (Predictive interval subset; supplementary figure 2). This approach is different from using a threshold for the number of points because it acknowledges that some species will have fewer records if their range is very restricted.
Let’s generate the dataset
no_island<-datos_filtered %>% filter(cat_island!="island") %>% drop_na(n_points) %>% drop_na(area_polygon) %>%drop_na(polymorphism)
no_island<-na.omit(no_island)
#no_island[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(no_island[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()
lm_points<-lm(T_points~T_area_countries_wsc,data=no_island)
summary(lm_points)##
## Call:
## lm(formula = T_points ~ T_area_countries_wsc, data = no_island)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6301 -0.8926 -0.0480 0.8971 2.6342
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.9696268 0.5204457 7.627 5.67e-11 ***
## T_area_countries_wsc 0.0004916 0.0000971 5.063 2.80e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.311 on 76 degrees of freedom
## Multiple R-squared: 0.2522, Adjusted R-squared: 0.2424
## F-statistic: 25.64 on 1 and 76 DF, p-value: 2.799e-06
no_island<-cbind(no_island,predict(lm_points,interval="prediction",level=0.50))## this plots the q10% and q90%## Warning in predict.lm(lm_points, interval = "prediction", level = 0.5): predictions on current data refer to _future_ responses
ggplot(no_island,aes(T_area_countries_wsc,T_points))+geom_point(size=3,aes(col=polymorphism))+ geom_smooth(method = "lm",level=0.99)+geom_line(aes(y=upr),col="red")+geom_line(aes(y=lwr),col="red")+theme_bw()## `geom_smooth()` using formula = 'y ~ x'
Number of colour monomorphic and polymorphic species after filtering
##
## no yes
## 35 23
Correlation plot between the variables
cor_matrix <- cor(na.omit(pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))
colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )Standardize variable before the analysis, excluding the count variables
#pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()Now, let’s prepare the dataset and tree so they match, this is super important. Your phylogeny names need to match a column of data
Let’s run the models!
Evaluate if the colour monomorphic and colour polymorphic species differ in the number of records
set.seed(30011994)
brm_points <- brm(
n_points ~ polymorphism,
data = pi_subset,
family = negbinomial(),
#prior = prior,
#data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="points2.rda"
)
pairs(brm_points)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: n_points ~ polymorphism
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 7.50 0.17 7.17 7.86 1.00 27591 20766
## polymorphismyes 0.53 0.28 -0.00 1.09 1.00 26398 21795
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 0.98 0.16 0.69 1.33 1.00 28316 23982
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.065 (95% CI [9.964e-11, 0.229])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
They do not have differences in the number of records
Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid
set.seed(30011994)
brm_centroid <- brm(
T_centroid_lat ~ polymorphism,
data = pi_subset,
family = skew_normal(),
#prior = prior,
#data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="centroids2.rda"
)
pairs(brm_centroid)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_centroid_lat ~ polymorphism
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 41.21 2.87 35.50 46.75 1.00 22737 21496
## polymorphismyes -4.21 4.17 -12.37 4.10 1.00 25556 23960
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 18.02 1.81 14.87 21.91 1.00 20726 21331
## alpha -3.30 1.44 -6.18 -0.02 1.00 21378 14702
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
They do not have differences in the latitude of the centroid
Evaluate if the colour monomorphic and colour polymorphic species differ in the body length
set.seed(30011994)
brm_bole <- brm(
T_bole ~ polymorphism,
data = pi_subset,
family = gaussian(),
#prior = prior,
#data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="bole2.rda"
)
pairs(brm_bole)## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_bole ~ polymorphism
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.11 0.12 1.89 2.34 1.00 26598 22972
## polymorphismyes 0.17 0.18 -0.19 0.53 1.00 26092 21577
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.68 0.07 0.57 0.83 1.00 24965 22613
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.018 (95% CI [7.630e-11, 0.105])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
They do not have differences in body length
let’s Evaluate the association fo the predictors
lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=pi_subset)
check_collinearity(lm_lat_range)The predictors are not collinear, we can use all of them in the models
set.seed(30011994)
brm_latrange_2 <- brm(
T_lat_range ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = pi_subset,
family = student(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="latrange2.rda"
)
pairs(brm_latrange_2)## Warning: There were 2 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: student
## Links: mu = identity; sigma = identity; nu = identity
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 58)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.51 0.38 0.02 1.35 1.00 2282 2244
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 6.77 0.88 5.01 8.50 1.00 21853 21648
## polymorphismyes 0.44 0.30 -0.16 1.03 1.00 32376 27705
## T_bole 0.10 0.27 -0.43 0.62 1.00 28391 28281
## T_centroid_lat -0.04 0.01 -0.06 -0.02 1.00 16499 17343
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.88 0.18 0.45 1.20 1.00 2843 2146
## nu 20.30 13.87 3.11 55.34 1.00 29240 14341
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.445 (95% CI [0.190, 0.847])
## Marginal R2: 0.371 (95% CI [0.172, 0.527])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
area_polygon_2 <- brm(
T_area_polygon ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)),
data = pi_subset,
family = skew_normal(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="areapolygon2.rda"
)
pairs(area_polygon_2)## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 58)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 485.26 291.49 26.87 1102.46 1.00 4905 7788
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2251.13 861.99 559.90 3967.39 1.00 28479 27991
## polymorphismyes 223.15 318.85 -389.81 868.10 1.00 25841 26459
## T_bole 216.46 265.29 -302.04 735.11 1.00 26134 27648
## T_centroid_lat -0.72 11.27 -22.87 21.65 1.00 30983 29762
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1051.28 151.46 747.52 1348.52 1.00 6977 7184
## alpha 3.53 2.20 -0.57 8.63 1.00 15454 19393
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.187 (95% CI [0.006, 0.488])
## Marginal R2: 0.066 (95% CI [5.615e-04, 0.197])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
To indirectly explore the difference in niche width between colour monomorphic and polymorphic species, we measured the number of ecological regions occupy by each species using the geographical records and polygons. the ecological regions were obtained here
BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on geographical records
set.seed(30011994)
eco_reg_points_2 <- brm(
eco_reg_points ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = pi_subset,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="ecoreg_points2.rda"
)
pairs(eco_reg_points_2)## Family: poisson
## Links: mu = log
## Formula: eco_reg_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 58)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.89 0.10 0.71 1.12 1.00 7911 13420
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.46 0.61 1.25 3.64 1.00 8006 13396
## polymorphismyes 0.31 0.18 -0.05 0.67 1.00 8540 13143
## T_bole 0.41 0.19 0.04 0.79 1.00 9042 14262
## T_centroid_lat -0.00 0.01 -0.01 0.01 1.00 8350 13036
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.963 (95% CI [0.943, 0.979])
## Marginal R2: 0.146 (95% CI [0.002, 0.407])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on polygon
eco_reg_polygon_2 <- brm(
eco_reg_polygon~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = pi_subset,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="ecoreg_pol2.rda"
)
pairs(eco_reg_polygon_2)## Family: poisson
## Links: mu = log
## Formula: eco_reg_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 58)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.03 0.11 0.84 1.28 1.00 6840 14036
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.45 0.68 2.12 4.77 1.00 6337 12509
## polymorphismyes 0.31 0.20 -0.08 0.71 1.00 6977 12265
## T_bole 0.31 0.21 -0.10 0.73 1.00 7053 11859
## T_centroid_lat -0.01 0.01 -0.02 0.01 1.00 6176 11849
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.980 (95% CI [0.969, 0.988])
## Marginal R2: 0.133 (95% CI [6.729e-04, 0.418])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here
temp_zones_points_2 <- brm(
temp_zones_points~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = pi_subset,
family = negbinomial(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="clim_points2.rda"
)
pairs(temp_zones_points_2)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 58)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.08 0.06 0.00 0.23 1.00 15567 18304
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.57 0.26 1.06 2.08 1.00 39201 28484
## polymorphismyes 0.11 0.10 -0.08 0.30 1.00 50836 31078
## T_bole 0.24 0.08 0.07 0.40 1.00 41984 29099
## T_centroid_lat 0.00 0.00 -0.00 0.01 1.00 45141 31680
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 99.60 75.08 21.38 299.95 1.00 53987 32100
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.293 (95% CI [0.096, 0.471])
## Marginal R2: 0.250 (95% CI [0.059, 0.422])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
temp_zones_polygon_2 <- brm(
temp_zones_polygon~polymorphism+T_bole+T_centroid_lat+ (1| gr(species, cov = A)) ,
data = pi_subset,
family = negbinomial(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="clim_pol2.rda"
)
pairs(temp_zones_polygon_2)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 58)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 58)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.13 0.09 0.01 0.35 1.00 9703 13603
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.20 0.33 2.57 3.86 1.00 39482 28967
## polymorphismyes -0.09 0.12 -0.33 0.16 1.00 49267 30590
## T_bole -0.01 0.11 -0.23 0.19 1.00 36329 28960
## T_centroid_lat -0.01 0.00 -0.02 -0.01 1.00 42875 30475
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 11.37 8.71 4.67 28.83 1.00 20958 13902
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.399 (95% CI [0.183, 0.561])
## Marginal R2: 0.335 (95% CI [0.133, 0.519])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
all_models<-NULL
for(mod in c("brm_latrange_2","area_polygon_2","eco_reg_points_2","eco_reg_polygon_2","temp_zones_polygon_2","temp_zones_points_2")){
baye_mode = get(mod)
bayes_results<-baye_mode %>%
spread_draws(b_polymorphismyes)
bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
all_models<-bind_rows(all_models, bayes_results)
}
all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
stat_halfeye()+
theme_classic()+
geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
labs(x="Estimate",y="Models")Plot of the model
paleta1<-c("#1ABEC6","#FF5B00")
dataset_plot<-pi_subset %>% drop_na(T_lat_range|polymorphism|T_bole|T_centroid_lat)
dataset_plot$predict<-predict(brm_latrange_2,type="response")[,"Estimate"]
dataset_plot %>% ggplot(aes(x=polymorphism,y=predict,fill=polymorphism))+geom_point(aes(x=polymorphism,y=T_lat_range),shape = 21,size=3, position = position_jitterdodge(),alpha=0.5)+geom_violin(aes(x=polymorphism,y=T_lat_range),alpha=0.1, position = position_dodge(width = .75),size=1)+
stat_summary(fun = mean,aes(color = polymorphism,group=polymorphism),fun.min = function(x) mean(x) - (2*se(x)),fun.max = function(x) mean(x)+(2*se(x)),geom = "pointrange",shape=22,size=1.5,col="black")+scale_fill_manual(values=paleta1)+scale_colour_manual(values=paleta1)+theme_classic()+labs(x="Colour polymorphism",y="Latitudinal range")We observed that the models with continous variables have values close to 0 and that the climatic zones models have random effects low variaces close to 0. This means that the phylogentic relationships of the individuals are not having a major effect on these models.
hyp <- "sd_species__Intercept^2 / (sd_species__Intercept^2 + sigma^2) = 0"
lat_range_sig <- hypothesis(brm_latrange_1, hyp, class = NULL)
area_sig<-hypothesis(area_polygon_1, hyp, class = NULL)
ggplot() + geom_histogram(aes(x = lat_range_sig$samples$H1, fill="Latitudinal range model"), alpha = 0.5)+
geom_histogram(aes(x = area_sig$samples$H1,fill="Area polygon model"), alpha = 0.5)+labs(x="Pagel's lambda",y="Count")+theme_classic()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
In consequence, we decided to run a set of models without considering the phylogeny. This let us include more species with good geographic records but that were not include in the phylogenetic reconstruction due to lack of genetic data.
join_dataset<-read_csv("data_total.csv",col_names=T)
join_dataset$species<-gsub(pattern=" ", replacement="_",join_dataset$species)
#keep unique rows
join_dataset<-distinct(join_dataset,species,.keep_all = TRUE)
###Tranform island as binary
join_dataset<-join_dataset %>% mutate(bin_island=ifelse(cat_island=="island"|cat_island=="island_continent",1,0))
#Let's modify the dataset to deal with polytipic
join_dataset$polymorphism[which(join_dataset$polymorphism %in% c("polytipic","possible polytipic","pattern variable")==TRUE)]<-NAArachnids is one of the groups with the poorest geographic information available in public databases. For instance, in our data ~52% of the species has less than 50 geographical records
species_points<-join_dataset %>% drop_na(n_points)
species_geo<-nrow(species_points[species_points$n_points<50,])/nrow(species_points)*100
print(paste0(species_geo,"%"," of the species with geographical information has less than 50 geographical records"))## [1] "52.0179372197309% of the species with geographical information has less than 50 geographical records"
To account for this, we decided to calculated the mean and its 95% confidence interval (CI) for the number of geographical records available for all the species. We excluded species from the subsequent analyses that fell outside the lower CI.
#Due to high vari
l_points<-na.omit(log(join_dataset$n_points))
##Let's calculate the 95% around the mean
library(boot)
data <- data.frame(xs = l_points)
bo <- boot(data[, "xs", drop = FALSE], statistic=meanfun, R=5000)
mean_ci<-boot.ci(bo, conf=0.95, type="bca")
ggplot(tibble(x=bo$t[,1]), aes(x=x)) +geom_density()+geom_segment(x=mean_ci$bca[4],xend=mean_ci$bca[5],y=0,yend=0,color="blue",size=2,lineend="round")##Remove species with low number of records
datos_filtered<-join_dataset %>% filter(n_points>=exp(mean_ci$bca[4]))
#datos_filtered<-na.omit(datos_filtered)
##Let's save this dataset for further analyses
data_without_filtering<-datos_filteredall the predictors seems skewed or not uniform distributed, let’s modify some predictors that may affect the regression due to their non-normal distribution
datos_filtered$T_centroid_lat<-abs(datos_filtered$centroid_lat)
datos_filtered$T_lat_range<-sqrt(abs(datos_filtered$lat_range))
datos_filtered$T_area_polygon<-sqrt(datos_filtered$area_polygon+1)
datos_filtered$T_lat_range_wsc<-abs(datos_filtered$lat_range_wsc)
datos_filtered$T_area_countries_wsc<-sqrt(datos_filtered$area_countries_wsc)
datos_filtered$T_points<-log(datos_filtered$n_points)
datos_filtered$T_bole<-log(datos_filtered$bole_female)Correlation plot between the variables
cor_matrix <- cor(na.omit(datos_filtered[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))
colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )Remove species from islands that can affect calculations due to their geographic limit for dispersion
Standardize variable before the analysis, excluding the count variables
#datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()Number of colour monomorphic and polymorphic species
##
## no yes
## 59 41
Let’s run the models!
Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid
set.seed(30011994)
brm_centroid <- brm(
T_centroid_lat ~ polymorphism,
data = datos_filtered_total,
family = skew_normal(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="centroids3.rda"
)
pairs(brm_centroid)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_centroid_lat ~ polymorphism
## Data: datos_filtered_total (Number of observations: 99)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 36.68 2.45 31.87 41.49 1.00 29452 25133
## polymorphismyes -8.88 3.83 -16.41 -1.34 1.00 28441 25674
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 18.78 1.46 16.21 21.96 1.00 22399 21133
## alpha -0.26 1.62 -3.76 2.82 1.00 19855 16728
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.055 (95% CI [3.434e-11, 0.142])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
They do not have differences in the latitude of the centroid
Evaluate if the colour monomorphic and colour polymorphic species differ in the body length
set.seed(30011994)
brm_bole <- brm(
T_bole ~ polymorphism,
data = datos_filtered_total,
family = gaussian(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="bole3.rda"
)
pairs(brm_bole)## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_bole ~ polymorphism
## Data: datos_filtered_total (Number of observations: 99)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.12 0.08 1.97 2.28 1.00 31494 24510
## polymorphismyes 0.15 0.13 -0.09 0.40 1.00 31013 24525
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.62 0.05 0.54 0.71 1.00 30166 24838
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.016 (95% CI [1.237e-10, 0.078])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
They do not have differences in their body length
let’s Evaluate the association fo the predictors
lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=datos_filtered_total)
check_collinearity(lm_lat_range)The predictors are not collinear, we can use all of them in the models
set.seed(30011994)
brm_latrange_3 <- brm(
T_lat_range ~ polymorphism+T_bole+T_centroid_lat,
data = datos_filtered_total,
family = gaussian(),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="latrange3.rda"
)
pairs(brm_latrange_3)## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 98)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 5.55 0.70 4.19 6.92 1.00 26745 24185
## polymorphismyes 0.33 0.29 -0.23 0.89 1.00 33868 26572
## T_bole 0.27 0.24 -0.21 0.75 1.00 30868 28291
## T_centroid_lat -0.03 0.01 -0.04 -0.01 1.00 30220 26746
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.34 0.10 1.17 1.55 1.00 32104 26514
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.198 (95% CI [0.080, 0.315])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
area_polygon_3 <- brm(
T_area_polygon ~ polymorphism+T_bole+T_centroid_lat,
data =datos_filtered_total,
family = skew_normal(),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
,file="areapoly3.rda"
)
pairs(area_polygon_3)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 97)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1489.07 606.43 274.80 2662.32 1.00 23322 25656
## polymorphismyes 245.96 241.94 -227.62 726.34 1.00 28142 25177
## T_bole 298.34 209.63 -106.13 717.41 1.00 27849 26531
## T_centroid_lat 7.36 7.36 -6.94 21.84 1.00 25571 26897
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1262.95 101.77 1081.23 1480.30 1.00 21835 22705
## alpha 3.34 1.25 1.45 6.30 1.00 21364 15085
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.045 (95% CI [5.415e-04, 0.119])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
To indirectly explore the difference in niche width between colour monomorphic and polymorphic species, we measured the number of ecological regions occupy by each species using the geographical records and polygons. the ecological regions were obtained here
BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on geographical records
set.seed(30011994)
eco_reg_points_3 <- brm(
eco_reg_points ~ polymorphism+T_bole+T_centroid_lat,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="ecoreg_points3.rda"
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: eco_reg_points ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 98)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.95 0.38 1.20 2.70 1.00 27427 27002
## polymorphismyes 0.25 0.16 -0.06 0.55 1.00 36094 27025
## T_bole 0.60 0.13 0.34 0.86 1.00 31383 28543
## T_centroid_lat -0.00 0.00 -0.01 0.01 1.00 30568 27635
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 1.94 0.28 1.44 2.54 1.00 34757 28371
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.192 (95% CI [0.071, 0.334])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on polygon
eco_reg_polygon_3 <- brm(
eco_reg_polygon~ polymorphism+T_bole+T_centroid_lat,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file="ecoreg_pol3.rda"
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: eco_reg_polygon ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 97)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.44 0.38 2.71 4.18 1.00 29834 26984
## polymorphismyes 0.38 0.17 0.05 0.71 1.00 32037 27029
## T_bole 0.39 0.14 0.11 0.67 1.00 32276 28756
## T_centroid_lat -0.01 0.00 -0.02 -0.00 1.00 31393 26553
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 1.71 0.24 1.27 2.23 1.00 32882 28190
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.194 (95% CI [0.071, 0.337])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here
temp_zones_points_3 <- brm(
temp_zones_points~ polymorphism+T_bole+T_centroid_lat ,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 10000
,file="temp_points3.rda"
)
pairs(temp_zones_points_3)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 98)
## Draws: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
## total post-warmup draws = 20000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.21 0.21 0.79 1.62 1.00 11682 11514
## polymorphismyes 0.14 0.09 -0.03 0.30 1.00 14716 13505
## T_bole 0.30 0.07 0.16 0.44 1.00 12891 12256
## T_centroid_lat 0.01 0.00 0.00 0.01 1.00 16263 13986
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 33.93 32.20 9.94 119.91 1.00 11751 9110
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.199 (95% CI [0.073, 0.333])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
temp_zones_polygon_3 <- brm(
temp_zones_polygon~polymorphism+T_bole+T_centroid_lat ,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 10000
,file="temp_pol3.rda"
)
pairs(temp_zones_polygon_3)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 97)
## Draws: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
## total post-warmup draws = 20000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.76 0.23 2.31 3.21 1.00 13969 11694
## polymorphismyes -0.06 0.10 -0.25 0.12 1.00 16576 14449
## T_bole 0.07 0.08 -0.09 0.23 1.00 15339 12730
## T_centroid_lat -0.01 0.00 -0.01 -0.00 1.00 16768 13725
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 8.16 2.22 4.83 13.45 1.00 17419 12693
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.189 (95% CI [0.060, 0.320])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
all_models<-NULL
for(mod in c("brm_latrange_3","area_polygon_3","temp_zones_polygon_3","temp_zones_points_3")){
baye_mode = get(mod)
bayes_results<-baye_mode %>%
spread_draws(b_polymorphismyes)
bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
all_models<-bind_rows(all_models, bayes_results)
}
all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
stat_halfeye()+
theme_classic()+
geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
labs(x="Estimate",y="Models")Considering that the presence on islands can be an indicator of range expansion, we also recorded whether species were distributed on islands by overlapping both sources of geographical distribution (occurrences and WSC) with the global shoreline vector from the islands database (Sayre et al., 2019).
We decided to test the association between colour polymorphism and the presence on islands using two datasets. The first one was using the species occurrences after discarding those species with poor geographic records as done here
##Remove species with low number of records
data_filtered_phylogeny<-data_filtered_phylogeny %>% drop_na(polymorphism) %>% drop_na(bin_island)
table(data_filtered_phylogeny$polymorphism)##
## no yes
## 60 35
This dataset includes 56 colour monomorphic species and 35 colour polymorphic species.
To increase the number of species in our analysis, we created a second dataset without filtering species based on the number of geographic records. We determined the presence on islands using the geographic distribution information available on the World Spider Catalog.
dataset_all_species_phylogeny<-dataset_all_species_phylogeny %>% drop_na(n_points) %>%drop_na(polymorphism) %>% drop_na(bin_island) %>% drop_na(cat_island_points)
table(dataset_all_species_phylogeny$polymorphism)##
## no yes
## 131 61
This second dataset includes 125 colour monomorphic species and 61 colour polymorphic species.
## Family: bernoulli
## Links: mu = logit
## Formula: bin_island ~ 1 + polymorphism + bole_female + (1 | gr(species, cov = A))
## Data: data_filtered_phylogeny (Number of observations: 94)
## Draws: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
## total post-warmup draws = 20000
##
## Group-Level Effects:
## ~species (Number of levels: 94)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 4.12 3.84 0.46 13.24 1.00 2525 3254
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.74 2.88 -1.75 8.67 1.00 7705 4824
## polymorphismyes 4.65 3.37 0.94 13.14 1.00 4488 2999
## bole_female 0.07 0.16 -0.15 0.43 1.00 6068 3615
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.481 (95% CI [0.076, 0.872])
## Marginal R2: 0.011 (95% CI [1.128e-24, 0.217])
With this dataset there seems to be an association between
colour polymorphism and the presence on islands
## .
## 0 1
## 0.1864407 0.8135593
## .
## 0 1
## 0.02857143 0.97142857
## standardGeneric for "plot" defined from package "base"
##
## function (x, y, ...)
## standardGeneric("plot")
## <environment: 0x0000000026a4aad8>
## Methods may be defined for arguments: x, y
## Use showMethods(plot) for currently available ones.
tree=check_and_fix_ultrametric(join_tree)
missing<-tree$tip.label[which(tree$tip.label %in% dataset_all_species_phylogeny$species==FALSE)]
island_dataset_tree<-drop.tip(tree,missing)
island_dataset_tree$edge.length <- island_dataset_tree$edge.length / (max(island_dataset_tree$edge.length))
A <- ape::vcv(island_dataset_tree,corr = TRUE)
set.seed(30011994)
dataset_all_species_phylogeny$log_bole_female<-log(dataset_all_species_phylogeny$bole_female)
dataset_all_species_phylogeny<-dataset_all_species_phylogeny[!is.na(dataset_all_species_phylogeny$log_bole_female),]
brm_island_2 <- brm(
bin_island ~ 1 + polymorphism + bole_female + (1| gr(species, cov = A)) ,
data = dataset_all_species_phylogeny,
family = bernoulli(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
,file = "island2.rda"
)
pairs(brm_island_2)## Family: bernoulli
## Links: mu = logit
## Formula: bin_island ~ 1 + polymorphism + bole_female + (1 | gr(species, cov = A))
## Data: dataset_all_species_phylogeny (Number of observations: 190)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 190)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 10.79 22.52 1.67 58.54 1.00 2294 2437
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -3.69 9.87 -25.50 1.94 1.00 4126 2473
## polymorphismyes 3.39 6.78 0.17 16.54 1.00 4427 2685
## bole_female 0.46 0.87 0.05 2.33 1.00 3341 2533
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.669 (95% CI [0.399, 1.000])
## Marginal R2: 0.214 (95% CI [2.796e-13, 0.412])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
With this dataset there seems to be an association between colour polymorphism and the presence on islands
## .
## 0 1
## 0.4573643 0.5426357
## .
## 0 1
## 0.1803279 0.8196721
A similar pattern is obtained when with run the model without the phylogeny
set.seed(30011994)
brm_island_3 <- brm(
bin_island ~ 1 + polymorphism + bole_female,
data = dataset_all_species_phylogeny,
family = bernoulli(),
#prior = prior,
#data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
## Family: bernoulli
## Links: mu = logit
## Formula: bin_island ~ 1 + polymorphism + bole_female
## Data: dataset_all_species_phylogeny (Number of observations: 190)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.72 0.31 -1.35 -0.12 1.00 29369 24091
## polymorphismyes 1.23 0.39 0.47 2.02 1.00 25518 22547
## bole_female 0.11 0.03 0.05 0.18 1.00 23714 21102
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.139 (95% CI [0.068, 0.215])
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
Lets observe how polymorphic and monomorphic species differ according to all the predictor. To build this graph we will use the linear models with the total data set and the Island model with the highest number of species.
all_models<-NULL
for(mod in c("brm_latrange_1","area_polygon_1","eco_reg_points_1","eco_reg_polygon_1","temp_zones_polygon_1","temp_zones_points_1","brm_island_2")){
baye_mode = get(mod)
bayes_results<-baye_mode %>%
spread_draws(b_polymorphismyes)
bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
all_models<-bind_rows(all_models, bayes_results)
}
all_models$model<-factor(all_models$model,levels=c("brm_island_2","temp_zones_polygon_1","temp_zones_points_1","eco_reg_polygon_1","eco_reg_points_1","area_polygon_1","brm_latrange_1"))
na.omit(all_models) %>% ggplot(aes(y = model, x = b_polymorphismyes, fill = after_stat(x < 0))) +
stat_halfeye()+
theme_classic()+
geom_vline(xintercept = 0, linetype = "dashed",col="black",size=0.8)+
labs(x="Estimate",y="Models")+
xlim(-0.5,5)+
scale_fill_manual(values = c("#BCCAEF","#D66D79" ))## Warning: Removed 45036 rows containing missing values or values outside the scale range
## (`stat_slabinterval()`).
From all the variables and data filtering explored, we can conclude that monomorphic and polymorphic spider species in our dataset only differ in their presence on islands